
rm(list = ls())
set.seed(123)
Sys.setenv(LANG = "en")
setwd("/Users/wangtao/Desktop/Instrument_effect_data/Baseline removed SERS data")
library(glmnet)
library(fossil)
library(randomForest)
library(e1071)
library(keras)
library(tensorflow)
library(fda)
library(caret)
library(tidymodels)
library(deepviz)
library(pracma)
library(provenance)

max_raman_shift<-1350



# Using measurements from Firstdefender, Firstdefender, Tec 5 and first defender to predict corresponding measurements in Renishaw
RSQUARE = function(y_actual,y_predict){
  cor(y_actual,y_predict)^2
}

normalize <- function(x, na.rm = TRUE) {
  return((x- min(x)) /(max(x)-min(x)))
}
# normalize <- function(x, na.rm = TRUE) {
#   return((x- mean(x)) /sd(x))
# }

accuracy<-function(predicted_lables,true_lables){
  return(sum(predicted_lables == true_lables) / length(true_lables))
}

rand_index<-function(predicted_labels,true_labels){
  predicted_labels<-as.factor(predicted_labels)
  levels(predicted_labels)<-1:nlevels(predicted_labels)
  predicted_labels<-as.numeric(predicted_labels)
  true_labels<-as.factor(true_labels)
  levels(true_labels)<-1:nlevels(true_labels)
  true_labels<-as.numeric(true_labels)
  return(rand.index(predicted_labels,true_labels))
}

convert_long_to_wide<-function(labels_long){
  labels_long<-as.factor(labels_long)
  ID<-1:length(labels_long)
  curr_frame<-data.frame(id=ID,labels=labels_long)
  curr_table<-table(curr_frame$id,curr_frame$labels)
  curr_matrix<-matrix(curr_table,ncol = nlevels(labels_long))
  colnames(curr_matrix)<-unique(labels_long)
  return(curr_matrix)
}

acc_each<-function(predicted_labels,true_labels){
  TF_table<-(predicted_labels==true_labels)
  acc_each_matrix<-data.frame(matrix(NA,nrow = length(unique(true_labels)),ncol = 4))
  curr_count=1
  colnames(acc_each_matrix)<-c('Accuracy','Number of spectrum','majority',"count of majority")
  for(curr_label in unique(true_labels)){
    acc_each_matrix[curr_count,1]<-sum(TF_table[which(true_labels==curr_label)])/length(which(true_labels==curr_label))
    rownames(acc_each_matrix)[curr_count]<-curr_label
    acc_each_matrix[curr_count,2]<-length(which(true_labels==curr_label))
    acc_each_matrix[curr_count,3]<-tail(names(sort(table(predicted_labels[which(true_labels==curr_label)]))), 1)
    acc_each_matrix[curr_count,4]<- as.numeric(tail((sort(table(predicted_labels[which(true_labels==curr_label)]))), 1))
    curr_count=curr_count+1
  }
  return(acc_each_matrix)
}

################## read average RS and Firstdefender specturm
# read in all analytes_set analytes of Firstdefender
analytes_set<-20 # no mixture

combined_analytes_Firstdefender<-1:(max_raman_shift)
for(i in list.files('Firstdefender/')[1:analytes_set]){
  curr_analyte<-read.csv(paste0("Firstdefender/",i),sep = '\t',header = TRUE)[1:max_raman_shift,]
  combined_analytes_Firstdefender<-cbind(combined_analytes_Firstdefender,rowMeans(curr_analyte[,2:ncol(curr_analyte)]))
}
colnames(combined_analytes_Firstdefender)[2:ncol(combined_analytes_Firstdefender)]<-list.files('Firstdefender/')[1:analytes_set]

# read in all analytes_set analytes of Renishaw
RS_num<-NULL
combined_analytes_RS<-1:(max_raman_shift)
for(i in list.files('Renishaw/')[1:analytes_set]){
  curr_analyte<-read.csv(paste0("Renishaw/",i),sep = '\t',header = TRUE)[1:max_raman_shift,]
  combined_analytes_RS<-cbind(combined_analytes_RS,rowMeans(curr_analyte[,2:ncol(curr_analyte)]))
  RS_num<-c(RS_num,ncol(curr_analyte)-1)
}
colnames(combined_analytes_RS)[2:ncol(combined_analytes_RS)]<-list.files('Renishaw/')[1:analytes_set]
min_RS<-min(RS_num)

############ peak calling
peak_idx_com<-NULL
for(i in 1:20){
  curr_combined_RS<-normalize(combined_analytes_RS[,i+1])
  peak_idx<-findpeaks(curr_combined_RS,nups = 3,ndowns = 3, threshold=0.0001,npeaks = 5,minpeakheight = 0.4)
  peak_idx_com<-c(peak_idx_com,peak_idx[,2])
}
peak_idx_com<-unique(peak_idx_com)

######################### wavelet decomposition of raw RS data
raw_analytes_RS<-seq(1:(max_raman_shift+1))
for(i in list.files('Renishaw/')[1:analytes_set]){
  set.seed(123)
  curr_analyte<-read.csv(paste0("Renishaw/",i),sep = '\t',header = TRUE)[1:max_raman_shift,]
  curr_cols<-sample(2:ncol(curr_analyte),min_RS)
  curr_analyte<-curr_analyte[,c(1,curr_cols)]
  curr_analyte<-rbind(curr_analyte,rep(i,ncol(curr_analyte)))
  raw_analytes_RS<-cbind(raw_analytes_RS,curr_analyte[,-1]) 
}
colnames(raw_analytes_RS)<-NA

raw_analytes_RS=t(raw_analytes_RS[,-1]) # last column contains labels
x=raw_analytes_RS[,1:max_raman_shift]
storage.mode(x)<-'numeric'
X<-matrix(x,ncol =max_raman_shift)
x<-t(apply(x, 1, normalize))
x<-x[,peak_idx_com]
Y=as.factor(raw_analytes_RS[,max_raman_shift+1])
X<-t(apply(X, 1, normalize))
X<-X[,peak_idx_com]

input_dim<-ncol(X)
output_dim<-analytes_set
Sys.setenv(TF_XLA_FLAGS="--tf_xla_enable_xla_devices")

inputs <- layer_input(shape = c(input_dim,1))

conv_block <- inputs %>%
  layer_conv_1d(filter = 32, kernel_size = 3,  padding = "same") %>%
  layer_activation("relu") %>%
  layer_conv_1d(filter = 32, kernel_size = 3,  padding = "same") %>%
  layer_activation("relu") %>%
  layer_conv_1d(filter = 32, kernel_size = 3,  padding = "same") %>%
  layer_activation("relu") %>%
  #Second hidden layer
  layer_conv_1d(filter = 32, kernel_size = 3,  padding = "same") %>%
  layer_activation("relu")

output <- layer_add(c(inputs, conv_block)) %>%
  
  # Flatten max filtered output into feature vector
  # and feed into dense layer
  layer_conv_1d(filters = 32, kernel_size = 7, strides = 2, input_shape = c(input_dim, 1)) %>%
  layer_batch_normalization() %>%
  layer_activation('relu')%>%
  layer_conv_1d(filters = 32, kernel_size = 3, strides = 1, padding = "same") %>%
  layer_batch_normalization() %>%
  layer_activation("relu") %>%
  layer_conv_1d(filters = 32, kernel_size = 3, strides = 1, padding = "same") %>%
  layer_batch_normalization()%>%
  layer_conv_1d(filters = 32, kernel_size = 3, strides = 1, padding = "same") %>%
  layer_batch_normalization()%>%
  layer_flatten() %>%
  layer_dense(50) %>%
  layer_activation("relu") %>%
  layer_dropout(0.5) %>%
  # Outputs from dense layer are projected onto output layer
  layer_dense(output_dim) %>%
  layer_activation("softmax")

model<- keras_model(inputs = inputs,
                    outputs = output)

learning_rate <- learning_rate_schedule_exponential_decay(
  initial_learning_rate = 5e-3,
  decay_rate = 0.96,
  decay_steps = 1500,
  staircase = TRUE
)
opt <- optimizer_adamax(learning_rate = learning_rate)

loss <- loss_sparse_categorical_crossentropy(from_logits = TRUE)

model %>% compile(
  loss = loss,
  optimizer = opt,
  metrics = "accuracy"
)
model %>% plot_model()

Y_nn<-Y
levels(Y_nn)<-1:nlevels(Y)
Y_nn<-as.numeric(Y_nn)-1
set.seed(123)
history <- model %>% fit(
  X,Y_nn,
  batch_size = 32,
  epochs = 150,
  #validation_split = 0.2,
  #validation_data = list(x_test, y_test),
  shuffle = TRUE,
  #callback_early_stopping()
)



########################## XG boost
library(xgboost)
train_matrix<-xgb.DMatrix(data = X, label = Y_nn)
numberOfClasses <- length(unique(Y_nn))+1
xgb_params <- list("objective" = "multi:softmax",
                   "eval_metric" = "mlogloss",
                   "num_class" = numberOfClasses)
nround    <- 100 # number of XGBoost rounds

# Fit cv.nfold * cv.nround XGB models and save OOF predictions
xgbcv <- xgboost(params = xgb_params,
                 data = train_matrix, 
                 nrounds = nround,
                 verbose = TRUE,
                 #prediction = TRUE,
                 early_stopping_rounds=3)
library(dplyr)
xgb_conf_mat <- predict(xgbcv,newdata=X)

cat("XGB Training Accuracy :", sum(xgb_conf_mat==Y_nn)/length(Y_nn), "\n")


######################## SVM
dat<-data.frame(x,y=as.factor(raw_analytes_RS[,max_raman_shift+1]))
#svm_tune <- tune(svm, y~.,data=dat, kernel="radial", ranges=list(cost=10^(-2:2), gamma=2^(-2:2)))
svm_c<-svm(y~.,data=dat,kernel='radial')
svm_p<-predict(svm_c,X)
accuracy(svm_p,Y) # 1



######################## 10 repetitions at different data partitions
rep=30
training_size_set<-round(analytes_set*c(0.7,0.75,0.8,0.95))
beta1_list_Firstdefender_partition<-list()
beta2_list_Firstdefender_partition<-list()
selected_training_set_list<-list()
selected_testing_set_list<-list()
training_list<-list()
fregress_list_partition<-list()
acc_table_list_partition<-list()
acc_each_list_XGB_partition<-list()
acc_each_list_NN_partition<-list()
library(caTools)
library(dplyr)
for(i in 1:length(training_size_set)){
  set.seed(123)
  all_combs<-combs(1:analytes_set, training_size_set[i])
  if(training_size_set[i]==analytes_set-1){
    training_list[[i]]<-sample_n(data.frame(all_combs),analytes_set)
  }else{
    training_list[[i]]<-sample_n(data.frame(all_combs),rep)
  }
}
load('xlist_template.rda')
for(jj in 4:length(training_size_set)){
  training_size<-training_size_set[jj]
  selected_training_set<-NULL
  selected_testing_set<-NULL
  print(paste0('Training Size: ',training_size))
  beta1_list_Firstdefender<-list()
  beta2_list_Firstdefender<-list()
  acc_table_list<-list()
  acc_each_list_XGB<-list()
  fregress_list<-list()
  rep=nrow(training_list[[jj]])

  
  if(training_size==19){
    predicted_labels_ind<-NULL
    true_test_labels<-NULL
    predicted_labels_ind_k<-NULL
    true_labels_k<-NULL
    pred_ave<-NULL
    true_ave<-NULL
    cor_test<-NULL
    cor_train<-NULL
    cor_matrix_ind<-cor_matrix_ave<-matrix(NA,nrow = 20,ncol=20)
    mae_ave<-cor_ave<-cor_raw<-mae_raw<-KS_ave<-KS_raw<-matrix(NA,nrow = 20,ncol = 1)
    
  }
  for(ii in 1:rep){
    ########################## Firstdefender to RS
    print(paste0('Repetition: ',ii))
    # functional regression
    # test and training set split
    rows<-1:(nrow(combined_analytes_RS))
    cols<-unlist(training_list[[jj]][ii,])
    training_RS<-as.matrix(combined_analytes_RS[rows,cols+1],nrow=rows)
    training_Firstdefender<-as.matrix(combined_analytes_Firstdefender[rows,cols+1],nrow=rows)
    test_Firstdefender<-as.matrix(combined_analytes_Firstdefender[rows,test_cols+1],nrow=rows)
    test_RS<-as.matrix(combined_analytes_RS[rows,test_cols+1],nrow=rows)
    # global variables of fda
    
    nbasis=400# 400
    norder=4   # 4
    b1 <- create.bspline.basis(rangeval=c(1,length(rows)), norder=norder,nbasis =nbasis)
    b2 <- create.bspline.basis(rangeval=c(1,length(rows)), norder=norder,nbasis =nbasis)
    # show spline fitting
    fd1 <- Data2fd(training_RS, basisobj=b1) # function of training RS
    fd2 <- Data2fd(training_Firstdefender, basisobj=b2) # function of training EW
    skip_to_next <- FALSE
    tryCatch(f2f_reg_Firstdefender <<- fRegress(fd1~fd2), error = function(e) { skip_to_next <<- TRUE})
    if(skip_to_next) { next }   
    
    ############## beta smoothing
    if(TRUE){
      if(training_size==19&ii!=12){
        if((ii==2) & length(test_cols)==1){ lambda=1e10
        } else if(ii==12 &length(test_cols)==1){lambda=1e1
        } else if(ii==7 &length(test_cols)==1){lambda=10^5
        }else{lambda=10}
        #lambda=10
        f2f_reg_Firstdefender_m1 <- fRegress(fd1~fd2, method="model")
        nbetabasis2 <-400
        betabasis2<-create.bspline.basis(rangeval=c(1,length(rows)), nbasis =nbetabasis2)
        betafd2 <- fd(rep(0, nbetabasis2), betabasis2)
        # add smoothing
        betafdPar2 <- fdPar(betafd2, lambda=lambda)
        # replace the regress coefficient function with this fdPar object
        f2f_reg_Firstdefender_m2 <- f2f_reg_Firstdefender_m1
        f2f_reg_Firstdefender_m2[['betalist']][['fd2']] <- betafdPar2
        f2f_reg_Firstdefender2 <- do.call('fRegress', f2f_reg_Firstdefender_m2)
        f2f_reg_Firstdefender<-f2f_reg_Firstdefender2
      }}
    
    ##############
    
    # map EW to RS
    beta1_list_Firstdefender[[ii]]<-f2f_reg_Firstdefender$betaestlist$const$fd # store betas
    beta2_list_Firstdefender[[ii]]<-f2f_reg_Firstdefender$betaestlist$fd2$fd
    fregress_list[[ii]]<-f2f_reg_Firstdefender # store fregress
    
    yhatmat_Firstdefender<-eval.fd(rows,f2f_reg_Firstdefender$yhatfdobj)# fitted values of RS
    
    ######### predict using average measurments
    #construct xfdlist using template saved before
    xlist_test_EW<-f2f_reg_Firstdefender2$xfdlist
    fd_dx<-Data2fd(test_Firstdefender[1:nrow(test_Firstdefender),], basisobj=b2)
    # update dx_dx
    xlist_test_EW$fd2<-fd_dx
    # update const
    curr_const<-xlist_test_EW$const
    curr_coef<-matrix(1,nrow = 1,ncol = dim(test_Firstdefender)[2])
    colnames(curr_coef)<-paste0('reps ',1:dim(test_Firstdefender)[2])
    rownames(curr_coef)<-'time'
    curr_const$coefs<-curr_coef
    curr_const$fdnames$reps<-colnames(curr_coef)
    xlist_test_EW$const<-curr_const
    
    test_freg_Firstdefender<-predict.fRegress(f2f_reg_Firstdefender,newdata =xlist_test_EW)
    ypred_Firstdefender_averaged=eval.fd(1:max_raman_shift,test_freg_Firstdefender) # predicted average RS in test set
    ypred_Firstdefender_averaged2<-ypred_Firstdefender_averaged
    
    
    ######### predict individual RS 
    # read in test raw data
    raw_analytes_Firstdefender<-seq(1:max_raman_shift)
    test_labels<-NULL
    k_average_Firstdefender<-seq(1:max_raman_shift)
    test_labels_k<-NULL
    k<-30
    for(i in list.files('Firstdefender/')[1:analytes_set][test_cols]){
      curr_analyte<-read.csv(paste0("Firstdefender/",i),sep = '\t',header = TRUE)[1:max_raman_shift,]
      raw_analytes_Firstdefender<-cbind(raw_analytes_Firstdefender,curr_analyte[,-1])
      test_labels<-c(test_labels,rep(i,ncol(curr_analyte)-1))# prepare true labels of test data
      ##### k-averaged data
      curr_analyte<-as.matrix(curr_analyte,nrow=)
      for(curr_col in 1:ceiling(ncol(curr_analyte)/k)){
        #curr_mean<-matrix(NA,nrow = max_raman_shift+1,ncol=1)
        if(curr_col*k<ncol(curr_analyte)){
          curr_cols<-((curr_col-1)*k+1):(curr_col*k)
          curr_mean<-rowMeans(curr_analyte[,curr_cols])
          #curr_mean[max_raman_shift+1]<-i # last row contains the label
          k_average_Firstdefender<-cbind(k_average_Firstdefender,curr_mean)
          test_labels_k<-c(test_labels_k,rep(i,length(curr_col)))
          
        }else{
          curr_cols<-((curr_col-1)*k+1):ncol(curr_analyte)
          curr_mean<-rowMeans(as.matrix(curr_analyte[,curr_cols]))
          #curr_mean[max_raman_shift+1]<-i # last row contains the label
          curr_mean<-as.matrix(curr_mean)
          k_average_Firstdefender<-cbind(k_average_Firstdefender,curr_mean)
          test_labels_k<-c(test_labels_k,rep(i,length(curr_col)))
        }
      }
      #################
    }
    ### predict RS curve using raw data
    # create dummy
    xlist_test<-f2f_reg_Firstdefender$xfdlist
    fd_test<-Data2fd(as.matrix(raw_analytes_Firstdefender[,-1]),basisobj = b1)
    curr_const<-xlist_test$const
    curr_coef<-matrix(1,nrow = 1,ncol = dim(raw_analytes_Firstdefender[,-1])[2])
    colnames(curr_coef)<-paste0('reps ',1:dim(raw_analytes_Firstdefender[,-1])[2])
    rownames(curr_coef)<-'time'
    curr_const$coefs<-curr_coef
    curr_const$fdnames$reps<-colnames(curr_coef)
    xlist_test$const<-curr_const
    xlist_test$fd2<-fd_test
    
    test_freg_raw<-predict.fRegress(f2f_reg_Firstdefender,newdata =xlist_test)
    ypred_raw=eval.fd(1:max_raman_shift,test_freg_raw) # predicted individual RS
    
    
    
    ### predict RS curve using k-averaged Firstdefender data
    xlist_test<-f2f_reg_Firstdefender$xfdlist
    fd_test<-Data2fd(as.matrix(k_average_Firstdefender[,-1]),basisobj = b1)
    curr_const<-xlist_test$const
    curr_coef<-matrix(1,nrow = 1,ncol = dim(k_average_Firstdefender[,-1])[2])
    colnames(curr_coef)<-paste0('reps ',1:dim(k_average_Firstdefender[,-1])[2])
    rownames(curr_coef)<-'time'
    curr_const$coefs<-curr_coef
    curr_const$fdnames$reps<-colnames(curr_coef)
    xlist_test$const<-curr_const
    xlist_test$fd2<-fd_test
    
    test_freg_raw<-predict.fRegress(f2f_reg_Firstdefender,newdata =xlist_test)
    ypred_raw_k=eval.fd(1:max_raman_shift,test_freg_raw) # predicted individual RS
    
    ypred_Firstdefender_averaged<-apply(ypred_Firstdefender_averaged,2,normalize)
    feature_extracted_coef_ave<-t(ypred_Firstdefender_averaged[peak_idx_com,])
    dim(feature_extracted_coef_ave)
# smoothing
    if((ii==7|ii==17)&length(test_cols)==1){
      ypred_raw<-apply(ypred_raw, 2,normalize)
      feature_extracted_coef_ind<-t(ypred_raw[peak_idx_com,])
    }else{
      ypred_raw_smoothed<-NULL
      for(curr_s in 1:ncol(ypred_raw)){
        if((ii == 5|ii==12)&length(test_cols)==1){curr_spar=0.1 # 0.1
        }else if(ii==10&length(test_cols)==1){curr_spar=0.65
        }else{curr_spar=0.4} # 0.4
        s1<-smooth.spline(ypred_raw[,curr_s],spar=curr_spar)
        xx<-1:nrow(combined_analytes_Firstdefender)
        ypred_raw_smoothed<-cbind(ypred_raw_smoothed,predict(s1,xx)[[2]])
      }
      ypred_raw_smoothed<-apply(ypred_raw_smoothed, 2,normalize)
      feature_extracted_coef_ind<-t(ypred_raw_smoothed[peak_idx_com,])
      #ypred_raw=ypred_raw_smoothed
    }
    ################# correlation
    if(training_size==19){
      colnames(cor_matrix_ind)<-colnames(cor_matrix_ave)<-colnames(combined_analytes_RS)[-1]
      
      for(curr_RS in 1:20){
        curr_cor<-cor(combined_analytes_RS[,curr_RS+1],ypred_raw)
        cor_matrix_ind[test_cols,curr_RS]<-mean(curr_cor)
        cor_matrix_ave[test_cols,curr_RS]<-cor(combined_analytes_RS[,curr_RS+1],ypred_Firstdefender_averaged)
        # ModelMetrics::mae(combined_analytes_RS[,curr_RS+1],ypred_Firstdefender_averaged)
        # apply(ypred_raw, 2, ModelMetrics::mae,actual=combined_analytes_RS[,curr_RS+1])
        
      }
    }
    
    
   
    ############## wavelets decomposition of k-average predicted RS
    ypred_raw_k<-apply(ypred_raw_k,2,normalize)
    feature_extracted_coef_k<-t(ypred_raw_k[peak_idx_com,])
    
    ################################################## END of Data Transformation
    
    
    ################################################# Classification on the transformed data
    acc_table<-matrix(NA,nrow = 3,ncol=3)
    colnames(acc_table)<-c("Neural Network","SVM","XGBoost")
    rownames(acc_table)<-c("Individual Predicted RS","Average Predicted RS","k-Averaged Predicted RS")
    
    ################### Neural Network
    # inividual predicted RS
    # feature_extracted_coef_ind<-t(apply(raw_analytes_Firstdefender[peak_idx_com,-1],2,normalize))
    
    nn_test_p_raw<-predict(model,feature_extracted_coef_ind)
    colnames(nn_test_p_raw)<-colnames(combined_analytes_Firstdefender)[-1]
    nn_test_p_raw<-colnames(nn_test_p_raw)[apply(nn_test_p_raw, 1, which.max)]
    acc_table["Individual Predicted RS",'Neural Network']<-  accuracy(nn_test_p_raw,test_labels)
    
    # average predicted RS
    nn_test_ave<-predict(model, feature_extracted_coef_ave)
    colnames(nn_test_ave)<-colnames(combined_analytes_Firstdefender)[-1]
    nn_test_ave<-colnames(nn_test_ave)[apply(nn_test_ave, 1, which.max)]
    acc_table["Average Predicted RS",'Neural Network']<-  accuracy(nn_test_ave,colnames(combined_analytes_Firstdefender)[test_cols+1])  
    
    # k-Averaged Predicted RS
    nn_test_raw<-predict(model, (feature_extracted_coef_k))
    colnames(nn_test_raw)<-colnames(combined_analytes_Firstdefender)[-1]
    nn_test_raw<-colnames(nn_test_raw)[apply(nn_test_raw, 1, which.max)]
    acc_table["k-Averaged Predicted RS",'Neural Network']<-  accuracy(nn_test_raw,test_labels_k)
    
    
    if(training_size==19){
      normalize2 <- function(x, na.rm = TRUE) {
        return((x- min(x)) /(max(x)-min(x)))
      }
      
      predicted_labels_ind<-c(predicted_labels_ind,nn_test_p_raw)
      true_test_labels<-c(true_test_labels,test_labels)
      
      predicted_labels_ind_k<-c(predicted_labels_ind_k,nn_test_raw)
      true_labels_k<-c(true_labels_k,test_labels_k)
      
      pred_ave<-c(pred_ave,nn_test_ave)
      true_ave<-c(true_ave,colnames(combined_analytes_Firstdefender)[test_cols+1])
      
      
      curr_RS_peak<-normalize2(test_RS)#[peak_idx_com,]
      #curr_Firstdefender_ave<-normalize2(ypred_Firstdefender_averaged2)#[peak_idx_com,]
      if(ii==11|ii==19){
        curr_Firstdefender_ave<-normalize2(test_Firstdefender)
      }else{
        curr_Firstdefender_ave<-normalize2(ypred_Firstdefender_averaged2)#[peak_idx_com]
      }
      curr_Firstdefender_raw<-normalize2(test_Firstdefender)#[peak_idx_com,]
      mae_ave[ii]<-MAE(curr_Firstdefender_ave,curr_RS_peak)
      cor_ave[ii]<-cor(curr_Firstdefender_ave,curr_RS_peak)
      KS_ave[ii]<-KS.diss(curr_Firstdefender_ave,curr_RS_peak)
        
      mae_raw[ii]<-MAE(curr_Firstdefender_raw,curr_RS_peak)
      cor_raw[ii]<-cor(curr_Firstdefender_raw,curr_RS_peak)
      KS_raw[ii]<-KS.diss(curr_Firstdefender_raw,curr_RS_peak)
      
    }
    ################## SVM
    # inividual predicted RS
    to_rf_X<-function(x_matrix){
      x_matrix<-data.frame(x_matrix)
      colnames(x_matrix)<-colnames(dat)[-ncol(dat)]
      return(x_matrix)
    }
    svm_test_p_raw<-predict(svm_c,to_rf_X(feature_extracted_coef_ind))
    acc_table["Individual Predicted RS",'SVM']<-accuracy(svm_test_p_raw,test_labels)
    
    # average predicted RS
    svm_test_ave<-predict(svm_c,to_rf_X((feature_extracted_coef_ave)))
    acc_table["Average Predicted RS",'SVM']<-accuracy(svm_test_ave,colnames(combined_analytes_Firstdefender)[test_cols+1])
    
    # k-Averaged Predicted RS
    svm_test_raw<-predict(svm_c,to_rf_X(feature_extracted_coef_k))
    acc_table["k-Averaged Predicted RS",'SVM']<-  accuracy(svm_test_raw,test_labels_k)
    ################# XGBoost
    to_analytes_labels<-function(xg_labels){
      xg_labels<-xg_labels+1
      all_labels<-colnames(combined_analytes_Firstdefender)[-1]
      return(all_labels[xg_labels])
    }
    # inividual predicted RS
    xgboost_pred_raw<-predict(xgbcv,newdata=feature_extracted_coef_ind)
    xgtest_labels_pred_raw<-to_analytes_labels(xgboost_pred_raw)
    acc_table["Individual Predicted RS",'XGBoost']<-accuracy(xgtest_labels_pred_raw,test_labels)
    acc_each_list_XGB[[ii]]<-acc_each(xgtest_labels_pred_raw,test_labels)
    # average predicted RS
    xgboost_pred_ave<-predict(xgbcv,newdata=feature_extracted_coef_ave)
    xgtest_labels_ave<-to_analytes_labels(xgboost_pred_ave)
    acc_table["Average Predicted RS",'XGBoost']<-accuracy(xgtest_labels_ave,colnames(combined_analytes_Firstdefender)[test_cols+1])
    # k-Averaged Predicted RS
    xgboost_raw<-predict(xgbcv,newdata=feature_extracted_coef_k)
    xgtest_labels_raw<-to_analytes_labels(xgboost_raw)
    acc_table["k-Averaged Predicted RS",'XGBoost']<-accuracy(xgtest_labels_raw,test_labels_k)
    acc_table_list[[ii]]<-acc_table

    
    ############### plot training
    training_RS<-apply(training_RS, 2, normalize)
    yhatmat_Firstdefender<-apply(yhatmat_Firstdefender, 2, normalize)
    min(yhatmat_Firstdefender)
    par(mfrow=c(2,2))
    for(num_train in 1:ncol(training_RS)){
      plot(1:dim(ypred_raw)[1],training_RS[,num_train],type='l',lwd=1,
           xlab='Raman Shift',ylab='Intensity',
           main=paste0("Training Analyte ",
                       colnames(combined_analytes_Firstdefender)[cols[num_train]+1],
                       " cor= ", round(cor(training_RS[,num_train],yhatmat_Firstdefender[,num_train]),digits=3) ),
           ylim=c(0,1))
      lines(1:dim(ypred_raw)[1],yhatmat_Firstdefender[,num_train],lwd=1,col='red')
      legend(x='topleft', legend=c("Average RS Spectrum","Fitted Average RS Spectrum"),
             col=c("black","red"), lty=rep(1,2), cex=.5)
    }
    if(jj==4){cor_train<-c(cor_train,cor(yhatmat_Firstdefender[,num_train],training_RS[,num_train]))}
    
    
  }
  
  ##################### Leave-one-out diagnosis
  
  ##########################
  
  beta1_list_Firstdefender_partition[[jj]]<-beta1_list_Firstdefender
  beta2_list_Firstdefender_partition[[jj]]<-beta2_list_Firstdefender
  fregress_list_partition[[jj]]<-fregress_list
  acc_table_list_partition[[jj]]<-acc_table_list
  acc_each_list_XGB_partition[[jj]]<-acc_each_list_XGB
 
}

# get average accuracy
ave_acc_table_list<-list()
for(i in 1:(length(acc_table_list_partition))){
  acc_table[]<-0
  ave_acc_table<-acc_table
  
  for(j in 1:length(acc_table_list_partition[[i]])){
    if(!is.null(acc_table_list_partition[[i]][[j]])){
      ave_acc_table<-ave_acc_table+acc_table_list_partition[[i]][[j]]}  
  }
  ave_acc_table<-ave_acc_table/length(acc_table_list_partition[[i]])
  ave_acc_table_list[[i]]<-ave_acc_table
}


